package opt_web_crawlers

import (
	"github.com/gocolly/colly"
	"github.com/gocolly/colly/debug"
	"log"
	"regexp"
	"strings"
)

func Opt_colly() {
	collector := colly.NewCollector(
		colly.Debugger(&debug.LogDebugger{}),

		colly.AllowedDomains("zhuanlan.zhihu.com", "www.zhuanlan.zhihu.com"),
		colly.AllowURLRevisit(),
		colly.URLFilters(regexp.MustCompile(".*")),
		colly.UserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"),
	)

	// 解析html
	collector.OnHTML("img", func(e *colly.HTMLElement) {
		link := e.Attr("src")
		if check_url(link) {
			log.Println("img src = ", link)
			collector.Visit(link)
		}

	})

	collector.Visit("https://zhuanlan.zhihu.com/p/419646304")
}

func check_url(link string) bool {
	return strings.Contains(link, "jpg") || strings.Contains(link, "png")
}
